Example 3:S Harmonized Landsat-Sentinel#

Accessing + organizing (tidying) HLS dataset

import geopandas as gpd
import pandas as pd

# mamba install odc-stac odc-geo pyarrow=10 -c conda-forge
import odc
from odc.geo.geobox import GeoBox
import odc.stac

import pystac_client
#import planetary_computer

import holoviews as hv
import hvplot.pandas
import hvplot.xarray

import xarray as xr

import fsspec
import ast
import numpy as np

import pystac

import os
stac_client = pystac_client.Client.open('https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD')
%%time

search = stac_client.search(
    collections = ['HLSS30.v2.0', 'HLSL30.v2.0'],
    intersects=dict(type="Point", coordinates=[-105.78, 35.79]), # Pecos Mtns NM
    datetime = '2022/2024',
)

items = search.get_all_items()
print(f'Returned {len(items)} items')
Returned 250 items
CPU times: user 153 ms, sys: 0 ns, total: 153 ms
Wall time: 11.9 s
gf = gpd.GeoDataFrame.from_features(items.to_dict(), crs='EPSG:4326')
gf
geometry eo:cloud_cover datetime start_datetime end_datetime
0 POLYGON ((-106.09829 35.14991, -105.85526 35.1... 100 2022-01-01T18:04:02.903000Z 2022-01-01T18:04:02.903Z 2022-01-01T18:04:02.903Z
1 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 6 2022-01-03T17:54:13.056000Z 2022-01-03T17:54:13.056Z 2022-01-03T17:54:13.056Z
2 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 85 2022-01-06T17:38:44.248000Z 2022-01-06T17:38:44.248Z 2022-01-06T17:39:08.139Z
3 POLYGON ((-106.09829 35.14991, -105.85855 35.1... 34 2022-01-06T18:04:08.477000Z 2022-01-06T18:04:08.477Z 2022-01-06T18:04:08.477Z
4 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 19 2022-01-08T17:54:07.404000Z 2022-01-08T17:54:07.404Z 2022-01-08T17:54:07.404Z
... ... ... ... ... ...
245 POLYGON ((-106.09829 35.14991, -105.87041 35.1... 16 2023-05-16T18:04:11.291000Z 2023-05-16T18:04:11.291Z 2023-05-16T18:04:11.291Z
246 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 60 2023-05-17T17:37:44.712000Z 2023-05-17T17:37:44.712Z 2023-05-17T17:38:08.603Z
247 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 59 2023-05-18T17:54:13.335000Z 2023-05-18T17:54:13.335Z 2023-05-18T17:54:13.335Z
248 POLYGON ((-106.09829 35.14991, -105.86778 35.1... 83 2023-05-21T18:04:09.689000Z 2023-05-21T18:04:09.689Z 2023-05-21T18:04:09.689Z
249 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 42 2023-05-23T17:54:16.318000Z 2023-05-23T17:54:16.318Z 2023-05-23T17:54:16.318Z

250 rows × 5 columns

# Instead of re-doing searches, can save and reload
# items.save_object_object('results.json')
# items = pystac.ItemCollection.from_file('results.json')
# gf = gpd.read_file('results.json')
items[0].self_href
'https://cmr.earthdata.nasa.gov/cloudstac/LPCLOUD/collections/HLSS30.v2.0/items/HLS.S30.T13SDV.2022001T175739.v2.0'
items[0].assets['B03'].href
'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13SDV.2022001T175739.v2.0/HLS.S30.T13SDV.2022001T175739.v2.0.B03.tif'
items[0]
gf['stac_id'] = [item.id for item in items]
gf['time'] = pd.to_datetime(gf.datetime)
gf.head()
geometry eo:cloud_cover datetime start_datetime end_datetime stac_id time
0 POLYGON ((-106.09829 35.14991, -105.85526 35.1... 100 2022-01-01T18:04:02.903000Z 2022-01-01T18:04:02.903Z 2022-01-01T18:04:02.903Z HLS.S30.T13SDV.2022001T175739.v2.0 2022-01-01 18:04:02.903000+00:00
1 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 6 2022-01-03T17:54:13.056000Z 2022-01-03T17:54:13.056Z 2022-01-03T17:54:13.056Z HLS.S30.T13SDV.2022003T174731.v2.0 2022-01-03 17:54:13.056000+00:00
2 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 85 2022-01-06T17:38:44.248000Z 2022-01-06T17:38:44.248Z 2022-01-06T17:39:08.139Z HLS.L30.T13SDV.2022006T173844.v2.0 2022-01-06 17:38:44.248000+00:00
3 POLYGON ((-106.09829 35.14991, -105.85855 35.1... 34 2022-01-06T18:04:08.477000Z 2022-01-06T18:04:08.477Z 2022-01-06T18:04:08.477Z HLS.S30.T13SDV.2022006T175731.v2.0 2022-01-06 18:04:08.477000+00:00
4 POLYGON ((-106.09829 35.14991, -104.89284 35.1... 19 2022-01-08T17:54:07.404000Z 2022-01-08T17:54:07.404Z 2022-01-08T17:54:07.404Z HLS.S30.T13SDV.2022008T174719.v2.0 2022-01-08 17:54:07.404000+00:00
gf[gf.stac_id.str.startswith('HLS.L30')].shape
(60, 7)
gf[gf.stac_id.str.startswith('HLS.S30')].shape
(190, 7)
#pull out Sentinel or Landsat flag
gf['platform'] = gf.stac_id.str[4]
#MGRS is military grid reference system, used for HLS
# https://hls.gsfc.nasa.gov/products-description/tiling-system/
gf['mgrs'] = gf.stac_id.str[9:14]
gf.platform.value_counts()
platform
S    190
L     60
Name: count, dtype: int64
df = gf.set_index('time').tz_localize(None) #drop tz info (always utc)
df = df.loc[:,['platform','eo:cloud_cover']]
df.head()
platform eo:cloud_cover
time
2022-01-01 18:04:02.903 S 100
2022-01-03 17:54:13.056 S 6
2022-01-06 17:38:44.248 L 85
2022-01-06 18:04:08.477 S 34
2022-01-08 17:54:07.404 S 19
df.hvplot.scatter(y='platform', c='eo:cloud_cover', marker='s',width=1000)
#sentinel
iS = items[0]
#landsat
iL = items[-1]
#take a look at sentinel assets 
#what do the keys mean? 
iS.assets.keys()
dict_keys(['B11', 'B09', 'VZA', 'B03', 'B04', 'VAA', 'B06', 'Fmask', 'B08', 'B12', 'B01', 'B07', 'B10', 'B8A', 'SAA', 'SZA', 'B05', 'B02', 'browse', 'metadata'])
iS.assets['B11'].href
'https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13SDV.2022001T175739.v2.0/HLS.S30.T13SDV.2022001T175739.v2.0.B11.tif'

Not sure exactly what this section is doing?#

#what is this step doing? let's see

MGRS_TILE = gf.mgrs.iloc[0]
url = 'https://github.com/scottyhq/mgrs/raw/main/MGRS_LAND.parquet'
with fsspec.open(url) as file:
    gfMGRS = gpd.read_parquet(file)
aoi = gfMGRS.query('  tile == @MGRS_TILE  ') #returns a gdf
aoi.explore()
Make this Notebook Trusted to load map: File -> Trust Notebook
s = aoi.iloc[0] #extract series from gdf
s
tile                                                      13SDV
geometry      POLYGON ((-106.1119250922 36.1397366041, -104....
epsg                                                      32613
utm_wkt       POLYGON ((399960 4000020,399960 3890220,509760...
utm_bounds           (399960.0, 3890220.0, 509760.0, 4000020.0)
Name: 1461, dtype: object
GEOMETRY = s.geometry.__geo_interface__
BOUNDS = ast.literal_eval(s.utm_bounds) #what does the ast.literal_eval do here? 
#https://www.educative.io/answers/what-is-astliteralevalnodeorstring-in-python
BOUNDS
(399960.0, 3890220.0, 509760.0, 4000020.0)
EPSG = str(s.epsg)
GRID = GeoBox.from_bbox(BOUNDS,
                        crs=EPSG,
                        resolution = 500,
                       )
GRID

GeoBox

Dimensions
221x221
EPSG
32613
Resolution
500m
Cell
50px
WKT
PROJCRS["WGS 84 / UTM zone 13N",
    BASEGEOGCRS["WGS 84",
        ENSEMBLE["World Geodetic System 1984 ensemble",
            MEMBER["World Geodetic System 1984 (Transit)"],
            MEMBER["World Geodetic System 1984 (G730)"],
            MEMBER["World Geodetic System 1984 (G873)"],
            MEMBER["World Geodetic System 1984 (G1150)"],
            MEMBER["World Geodetic System 1984 (G1674)"],
            MEMBER["World Geodetic System 1984 (G1762)"],
            MEMBER["World Geodetic System 1984 (G2139)"],
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]],
            ENSEMBLEACCURACY[2.0]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4326]],
    CONVERSION["UTM zone 13N",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",-105,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",0.9996,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",500000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",0,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["(E)",east,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["(N)",north,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Navigation and medium accuracy spatial referencing."],
        AREA["Between 108°W and 102°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Northwest Territories (NWT); Nunavut; Saskatchewan. Mexico. United States (USA)."],
        BBOX[0,-108,84,-102]],
    ID["EPSG",32613]]

Separating L, S items from STAC collection#

need to separate bc band naming schemes not the same

itemsL8 = [i for i in items if i.id.startswith('HLS.L')]
itemsS2 = [i for i in items if i.id.startswith('HLS.S')]
len(itemsL8)
60
len(itemsS2)
190
59+185
244
# Map to common band names
# https://github.com/stac-extensions/eo#common-band-names

common_names = {
    'L8': {'B03':'green',
           'B04':'red',
           'B05':'nir08',
           'B06':'swir16',
          },
    'S2': {'B03':'green',
           'B04':'red',
           'B8A':'nir08',
           'B11':'swir16',
          }
}

Now loading items into xarray#

L = odc.stac.load(
    itemsL8,
    chunks = {'x':256,'y':256},
    geobox=GRID
)
L
<xarray.Dataset>
Dimensions:      (y: 221, x: 221, time: 60)
Coordinates:
  * y            (y) float64 4e+06 4e+06 3.999e+06 ... 3.891e+06 3.89e+06
  * x            (x) float64 3.998e+05 4.002e+05 ... 5.092e+05 5.098e+05
    spatial_ref  int32 32613
  * time         (time) datetime64[ns] 2022-01-06T17:38:44.248000 ... 2023-05...
Data variables: (12/16)
    B10          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B03          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B04          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    Fmask        (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B05          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B07          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    ...           ...
    B02          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    SAA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B11          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    VZA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    SZA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    browse       (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
#browse images not georeferenced ? 
L.isel(time=1).browse.plot()
/home/emmamarshall/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/rasterio/__init__.py:304: NotGeoreferencedWarning: Dataset has no geotransform, gcps, or rpcs. The identity matrix will be returned.
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
Cell In[43], line 2
      1 #browse images not georeferenced ? 
----> 2 L.isel(time=1).browse.plot()

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/xarray/plot/accessor.py:47, in DataArrayPlotAccessor.__call__(self, **kwargs)
     45 @functools.wraps(dataarray_plot.plot, assigned=("__doc__", "__annotations__"))
     46 def __call__(self, **kwargs) -> Any:
---> 47     return dataarray_plot.plot(self._da, **kwargs)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/xarray/plot/dataarray_plot.py:267, in plot(darray, row, col, col_wrap, ax, hue, subplot_kws, **kwargs)
    218 def plot(
    219     darray: DataArray,
    220     *,
   (...)
    227     **kwargs: Any,
    228 ) -> Any:
    229     """
    230     Default plot of DataArray using :py:mod:`matplotlib:matplotlib.pyplot`.
    231 
   (...)
    265     xarray.DataArray.squeeze
    266     """
--> 267     darray = darray.squeeze().compute()
    269     plot_dims = set(darray.dims)
    270     plot_dims.discard(row)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/xarray/core/dataarray.py:1096, in DataArray.compute(self, **kwargs)
   1077 """Manually trigger loading of this array's data from disk or a
   1078 remote source into memory and return a new array. The original is
   1079 left unaltered.
   (...)
   1093 dask.compute
   1094 """
   1095 new = self.copy(deep=False)
-> 1096 return new.load(**kwargs)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/xarray/core/dataarray.py:1070, in DataArray.load(self, **kwargs)
   1052 def load(self: T_DataArray, **kwargs) -> T_DataArray:
   1053     """Manually trigger loading of this array's data from disk or a
   1054     remote source into memory and return this array.
   1055 
   (...)
   1068     dask.compute
   1069     """
-> 1070     ds = self._to_temp_dataset().load(**kwargs)
   1071     new = self._from_temp_dataset(ds)
   1072     self._variable = new._variable

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/xarray/core/dataset.py:752, in Dataset.load(self, **kwargs)
    749 import dask.array as da
    751 # evaluate all the dask arrays simultaneously
--> 752 evaluated_data = da.compute(*lazy_data.values(), **kwargs)
    754 for k, data in zip(lazy_data, evaluated_data):
    755     self.variables[k].data = data

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/base.py:599, in compute(traverse, optimize_graph, scheduler, get, *args, **kwargs)
    596     keys.append(x.__dask_keys__())
    597     postcomputes.append(x.__dask_postcompute__())
--> 599 results = schedule(dsk, keys, **kwargs)
    600 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
     86     elif isinstance(pool, multiprocessing.pool.Pool):
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads
    101 with pools_lock:

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    509         _execute_task(task, data)  # Re-execute locally
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)
    513 state["cache"][key] = res

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/local.py:319, in reraise(exc, tb)
    317 if exc.__traceback__ is not tb:
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    222 try:
    223     task, data = loads(task_info)
--> 224     result = _execute_task(task, data)
    225     id = get_id()
    226     result = dumps((result, id))

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
    115     func, args = arg[0], arg[1:]
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):
    121     return arg

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/core.py:119, in <genexpr>(.0)
    115     func, args = arg[0], arg[1:]
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):
    121     return arg

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/core.py:119, in _execute_task(arg, cache, dsk)
    115     func, args = arg[0], arg[1:]
    116     # Note: Don't assign the subtask results to a variable. numpy detects
    117     # temporaries by their reference count and can execute certain
    118     # operations in-place.
--> 119     return func(*(_execute_task(a, cache) for a in args))
    120 elif not ishashable(arg):
    121     return arg

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_load.py:671, in _dask_loader_tyx(srcs, gbt, iyx, cfg, env)
    669 chunk = np.empty(gbox.shape.yx, dtype=cfg.dtype)
    670 with rio_env(**env):
--> 671     return _fill_2d_slice(srcs, gbox, cfg, chunk)[np.newaxis]

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_load.py:698, in _fill_2d_slice(srcs, dst_gbox, cfg, dst)
    695     return dst
    697 src, *rest = srcs
--> 698 _roi, pix = rio_read(src, cfg, dst_gbox, dst=dst)
    700 for src in rest:
    701     # first valid pixel takes precedence over others
    702     _roi, pix = rio_read(src, cfg, dst_gbox)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:186, in rio_read(src, cfg, dst_geobox, dst)
    163 """
    164 Internal read method.
    165 
   (...)
    182 
    183 """
    185 try:
--> 186     return _rio_read(src, cfg, dst_geobox, dst)
    187 except rasterio.errors.RasterioIOError as e:
    188     if cfg.fail_on_error:

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:226, in _rio_read(src, cfg, dst_geobox, dst)
    223 if src.band > rdr.count:
    224     raise ValueError(f"No band {src.band} in '{src.uri}'")
--> 226 rr = _reproject_info_from_rio(rdr, dst_geobox, ttol=ttol)
    228 if cfg.use_overviews and rr.read_shrink > 1:
    229     ovr_idx = _pick_overview(rr.read_shrink, rdr.overviews(src.band))

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:100, in _reproject_info_from_rio(rdr, dst_geobox, ttol)
     97 def _reproject_info_from_rio(
     98     rdr: rasterio.DatasetReader, dst_geobox: GeoBox, ttol: float
     99 ) -> ReprojectInfo:
--> 100     return compute_reproject_roi(rio_geobox(rdr), dst_geobox, ttol=ttol)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/geo/overlap.py:489, in compute_reproject_roi(src, dst, ttol, stol, padding, align)
    485 # pylint: disable=too-many-locals
    487 pts_per_side = 5
--> 489 tr = native_pix_transform(src, dst)
    491 if tr.linear is None:
    492     padding = 1 if padding is None else padding

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/geo/overlap.py:337, in native_pix_transform(src, dst)
    334 if isinstance(src, GeoBox) and isinstance(dst, GeoBox) and src.crs == dst.crs:
    335     return _same_crs_pix_transform(src, dst)
--> 337 return GbxPointTransform(src, dst)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/geo/overlap.py:105, in GbxPointTransform.__init__(self, src, dst, back)
     99 def __init__(
    100     self,
    101     src: GeoBoxBase,
    102     dst: GeoBoxBase,
    103     back: Optional["GbxPointTransform"] = None,
    104 ):
--> 105     assert src.crs is not None and dst.crs is not None
    106     self._src = src
    107     self._dst = dst

AssertionError: 
HLS = odc.stac.load(
    items,
    chunk={'x':256,'y':256},
    geobox=GRID
)
Aborting load due to failure while reading: https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13SDV.2022001T175739.v2.0/HLS.S30.T13SDV.2022001T175739.v2.0.B11.tif:1
---------------------------------------------------------------------------
CPLE_OpenFailedError                      Traceback (most recent call last)
File rasterio/_base.pyx:308, in rasterio._base.DatasetBase.__init__()

File rasterio/_base.pyx:219, in rasterio._base.open_dataset()

File rasterio/_err.pyx:221, in rasterio._err.exc_wrap_pointer()

CPLE_OpenFailedError: '/vsicurl/https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13SDV.2022001T175739.v2.0/HLS.S30.T13SDV.2022001T175739.v2.0.B11.tif' not recognized as a supported file format.

During handling of the above exception, another exception occurred:

RasterioIOError                           Traceback (most recent call last)
Cell In[44], line 1
----> 1 HLS = odc.stac.load(
      2     items,
      3     chunk={'x':256,'y':256},
      4     geobox=GRID
      5 )

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_load.py:610, in load(items, bands, groupby, resampling, dtype, chunks, pool, crs, resolution, anchor, geobox, bbox, lon, lat, x, y, like, geopolygon, progress, fail_on_error, stac_cfg, patch_url, preserve_original_order, **kw)
    607 if progress is not None:
    608     _work = progress(SizedIterable(_work, total_tasks))
--> 610 for _ in _work:
    611     pass
    613 return _with_debug_info(ds, tasks=_tasks)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_utils.py:38, in pmap(func, inputs, pool)
     34 """
     35 Wrapper for ThreadPoolExecutor.map
     36 """
     37 if pool is None:
---> 38     yield from map(func, inputs)
     39     return
     41 if isinstance(pool, int):

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_load.py:601, in load.<locals>._do_one(task)
    595 srcs = [
    596     src
    597     for src in (_parsed[idx].get(band, None) for idx, band in task.srcs)
    598     if src is not None
    599 ]
    600 with rio_env(**_rio_env):
--> 601     _ = _fill_2d_slice(srcs, task.dst_gbox, task.cfg, dst_slice)
    602 t, y, x = task.idx_tyx
    603 return (task.band, t, y, x)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_load.py:698, in _fill_2d_slice(srcs, dst_gbox, cfg, dst)
    695     return dst
    697 src, *rest = srcs
--> 698 _roi, pix = rio_read(src, cfg, dst_gbox, dst=dst)
    700 for src in rest:
    701     # first valid pixel takes precedence over others
    702     _roi, pix = rio_read(src, cfg, dst_gbox)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:194, in rio_read(src, cfg, dst_geobox, dst)
    188     if cfg.fail_on_error:
    189         log.error(
    190             "Aborting load due to failure while reading: %s:%d",
    191             src.uri,
    192             src.band,
    193         )
--> 194         raise e
    196 # Failed to read, but asked to continue
    197 log.warning("Ignoring read failure while reading: %s:%d", src.uri, src.band)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:186, in rio_read(src, cfg, dst_geobox, dst)
    163 """
    164 Internal read method.
    165 
   (...)
    182 
    183 """
    185 try:
--> 186     return _rio_read(src, cfg, dst_geobox, dst)
    187 except rasterio.errors.RasterioIOError as e:
    188     if cfg.fail_on_error:

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/odc/stac/_reader.py:219, in _rio_read(src, cfg, dst_geobox, dst)
    209 def _rio_read(
    210     src: RasterSource,
    211     cfg: RasterLoadParams,
   (...)
    215     # if resampling is `nearest` then ignore sub-pixel translation when deciding
    216     # whether we can just paste source into destination
    217     ttol = 0.9 if cfg.nearest else 0.05
--> 219     with rasterio.open(src.uri, "r", sharing=False) as rdr:
    220         assert isinstance(rdr, rasterio.DatasetReader)
    221         ovr_idx: Optional[int] = None

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/rasterio/env.py:451, in ensure_env_with_credentials.<locals>.wrapper(*args, **kwds)
    448     session = DummySession()
    450 with env_ctor(session=session):
--> 451     return f(*args, **kwds)

File ~/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/rasterio/__init__.py:304, in open(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs)
    301 path = _parse_path(raw_dataset_path)
    303 if mode == "r":
--> 304     dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
    305 elif mode == "r+":
    306     dataset = get_writer_for_path(path, driver=driver)(
    307         path, mode, driver=driver, sharing=sharing, **kwargs
    308     )

File rasterio/_base.pyx:310, in rasterio._base.DatasetBase.__init__()

RasterioIOError: '/vsicurl/https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13SDV.2022001T175739.v2.0/HLS.S30.T13SDV.2022001T175739.v2.0.B11.tif' not recognized as a supported file format.
HLS
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
Cell In[45], line 1
----> 1 HLS

NameError: name 'HLS' is not defined
S = odc.stac.load(
    itemsS2,
    chunks={'x':256, 'y':256},
    geobox=GRID
)
S
<xarray.Dataset>
Dimensions:      (y: 221, x: 221, time: 190)
Coordinates:
  * y            (y) float64 4e+06 4e+06 3.999e+06 ... 3.891e+06 3.89e+06
  * x            (x) float64 3.998e+05 4.002e+05 ... 5.092e+05 5.098e+05
    spatial_ref  int32 32613
  * time         (time) datetime64[ns] 2022-01-01T18:04:02.903000 ... 2023-05...
Data variables: (12/19)
    B11          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B09          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    VZA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B03          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B04          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    VAA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    ...           ...
    B8A          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    SAA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    SZA          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B05          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    B02          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    browse       (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
#add platform coord that exists along time dim to track sensor
L = L.assign_coords(platform=('time',['L8',]*L.time.size))
S = S.assign_coords(platform=('time',['S2',]*S.time.size))
common_names
{'L8': {'B03': 'green', 'B04': 'red', 'B05': 'nir08', 'B06': 'swir16'},
 'S2': {'B03': 'green', 'B04': 'red', 'B8A': 'nir08', 'B11': 'swir16'}}
#use common names mapping ('L8':{B03: green) ...
#this will rename only the vars in common names dict
S = S.rename_vars(common_names['S2'])
L = L.rename_vars(common_names['L8'])
bands = ['green','red','nir08','swir16','Fmask'] # bands we want to keep track of. browse imagse have no georef
#subset to just the bands we're interested in and combine sensors
ds = xr.concat([L[bands],S[bands]], dim='time') #lazy, but no alignment ... so time dim out of order? 
#check if the elements of time dim are in order --  they're not. S and L are 'stack'ed when concat, no time-aware merge
np.diff(ds.time.values).astype(int) > 0
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])
np.all(np.diff(ds.time.values).astype(int) > 0)
False
ds = ds.sortby('time')
#add cloud cover estimate from STAC metadata
ds = ds.assign_coords(cloud_cover=df['eo:cloud_cover'])
#looks like can't do anything not lazy wihtout getting full res NASA images and changing gdal settings? 
import os
os.environ['GDAL_HTTP_COOKIEJAR'] = '/tmp/cookie'
os.environ['GDAL_HTTP_COOKIEFILE'] = '/tmp/cookie'
os.environ['GDAL_DISABLE_READDIR_ON_OPEN'] = 'EMPTY_DIR'
ds['ndsi'] = (ds.green - ds.swir16) / (ds.green + ds.swir16)
ds['ndsi'].isel(time=1).plot()
/home/emmamarshall/miniconda3/envs/tidy_xr/lib/python3.11/site-packages/dask/core.py:119: RuntimeWarning: divide by zero encountered in divide
  return func(*(_execute_task(a, cache) for a in args))
<matplotlib.collections.QuadMesh at 0x7fc86879d850>
_images/83635dc1b9a94d7670f24dd6529f339b61d4067ad4fe3601b01fac28f242906c.png
ds.green.platform
<xarray.DataArray 'platform' (time: 250)>
array(['S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'L8',
       'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8',
       'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2',
       'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2',
       'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8',
       'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2',
       'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'S2', 'S2',
       'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2',
       'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2',
       'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2',
       'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8',
       'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2',
       'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2',
       'S2', 'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2',
       'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2',
       'S2', 'L8', 'S2', 'S2', 'L8', 'S2', 'S2', 'S2'], dtype='<U2')
Coordinates:
    spatial_ref  int32 32613
  * time         (time) datetime64[ns] 2022-01-01T18:04:02.903000 ... 2023-05...
    platform     (time) <U2 'S2' 'S2' 'L8' 'S2' 'S2' ... 'L8' 'S2' 'S2' 'S2'
    cloud_cover  (time) int64 100 6 85 34 19 67 3 24 2 ... 1 4 59 16 60 59 83 42
ds.green.isel(time=2).hvplot.image(data_aspect=1)
ds
<xarray.Dataset>
Dimensions:      (time: 250, y: 221, x: 221)
Coordinates:
  * y            (y) float64 4e+06 4e+06 3.999e+06 ... 3.891e+06 3.89e+06
  * x            (x) float64 3.998e+05 4.002e+05 ... 5.092e+05 5.098e+05
    spatial_ref  int32 32613
  * time         (time) datetime64[ns] 2022-01-01T18:04:02.903000 ... 2023-05...
    platform     (time) <U2 'S2' 'S2' 'L8' 'S2' 'S2' ... 'L8' 'S2' 'S2' 'S2'
    cloud_cover  (time) int64 100 6 85 34 19 67 3 24 2 ... 1 4 59 16 60 59 83 42
Data variables:
    green        (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    red          (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    nir08        (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    swir16       (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    Fmask        (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>
    ndsi         (time, y, x) float32 dask.array<chunksize=(1, 221, 221), meta=np.ndarray>